#delimit ;
set more off ;
clear ;

cd "$Mydirectory1/1_DataSources/ConsumptionSurvey_1936/";

/********************************************************************

*****************	URBAN SAMPLE

********************************************************************/  

use "./RawData/08908-0001-Data.dta"  ;

do "./RawData/08908-0001-Supplemental_syntax.do" ;


***Note that city=250 seems like a typo and shoudl be 200 (Atlanta) ;
replace V3=200 if V3==250 ;
gen white=V124==1 ;
label define white 1 "White" 0 "Non-white" ;
label values white white ;
gen  husband_age=V13 ;
keep if husband_age>29 & husband_age<51 ;

***Geography ;
ren V4 state ;
ren V3 city ;
ren V6 sample_type ;

gen occ1=lower(V128) ;
gen ind1=V129 ;
gen self_or_wage= V130 ;
gen tot_fam_inc=V234 ;
***Sources of non-wage income ;
gen inc_home_work=V227 ;
gen inc_interest=V228 ;
gen inc_profits=V229 ;
gen inc_rents=V230 ;
gen inc_pensions=V231 ;
gen inc_gifts=V232 ;
gen inc_losses=V233 ;


label var inc_home_work "Income from work at home" ;
label var inc_interest "Income from interest, dividends" ;
label var inc_profits "Income from profits" ;
label var inc_rents "Income from renting property" ;
label var inc_pensions "Income from pensions" ;
label var inc_gifts "Cash gifts from non-family members" ;
label var inc_losses "Losses" ;

gen inc_nonwage=inc_home_work + inc_interest + inc_profits + inc_rent + inc_pensions + inc_gifts - inc_losses ;

label var inc_nonwage "Non-wage inc: Inc from homework, interest, profits, rent, pensions, gifts minus losses" ;

gen occ1_harm=. ;

************************************************************** ;
****************	Run the occ-coding script	************* ;
run "./code/1b_occ_coding_1936" ;
************************************************************** ;

tab occ1_h, missing ;

*****Diagnostics on earnings versus wages ;
*****Rename all of the individual earnings vars ;
*****Luckily they are V134, V144, so we can do in a loop ;
forval x=1/9 { ;
local varnum=124+10*`x' ;
ren V`varnum' mem`x'_wagetot ;
label var mem`x'_wagetot "Self-reported wages of member `x'" ;
local varnum2=120+10*`x' ;
di "`varnum2'" ;
gen mem`x'_type= V`varnum2' ;
tab V`varnum2' ;
tab mem`x'_type ;
gen mem`x'_wagetot0 = mem`x'_wagetot ;
replace mem`x'_wagetot0=0 if mem`x'_type==1 ;
label var mem`x'_wagetot0 "Wages of member `x' but set to zero is self empl." ;
} ;

****Note below that rowtotal treats missings as zeros, which is what we want ;
egen fam_wagetot=rowtotal(mem*_wagetot) ;
egen fam_wagetot0=rowtotal(mem*_wagetot0) ;
gen share_wage=fam_wagetot/tot_fam_inc ;
gen share_wage0=fam_wagetot0/tot_fam_inc ;

label var fam_wagetot "Total self-reported wages of all fam members" ;
label var fam_wagetot0 "Totl wages of all fam members, self-employed coded as zero" ;
label var mem1_wagetot "Self-reported wages of first fam member" ;
label var mem1_wagetot0 "Self-reported wages of first fam member, self-emp coded as zero" ;

******************HOUSEHOLD COMPOSITION**************** ;
****Tag someone as a kid if that's their label in the household list ;
****Loop over all the household membership variables: V21, V27, ..., V82 ;
forval x=3/14 { ;
local varnum=21 + (`x'-3)*6 ;
gen kid`x'=0 ;
replace kid`x'=1 if inlist(V`varnum', 1, 4, 7, 8, 18, 16, 26, 43, 45, 50, 53, 57, 60,61, 70,  76, 87, 99, 100, 108, 111 ) ;
label var kid`x' "Person `x' is a child of HH (based on label)" ;
} ;

egen totkids=rowtotal(kid3 kid4 kid5 kid6 kid7 kid8 kid9 kid10 kid11 kid12 kid13 kid14) ;

*****Alternative calculation of # of kids (all HH members under 18) ;
****Loop over all the household age variables: V23, V29, ..., V57 ;
forval x=3/14 { ;
local varnum=23 + (`x'-3)*6 ;
gen kid`x'_2=0 ;
replace kid`x'_2=1 if V`varnum'<18 ;
label var kid`x'_2 "Person `x' is a child of HH (based on age)" ;
} ;

egen totkids_2=rowtotal(kid3_2 kid4_2 kid5_2 kid6_2 kid7_2 kid8_2 kid9_2 kid10_2 kid11 kid12_2 kid13_2 kid14_2) ;

*****Alternative calculation of # of kids (all HH members under 18 AND labeled as kids) ;
****Loop over household label (V21, V27, ..., V57) and age (V23,...) ;
forval x=3/14 { ;
local varnum=21 + (`x'-3)*6 ;
local varnum2=23 + (`x'-3)*6 ;
gen kid`x'_3=0 ;
replace kid`x'_3=1 if V`varnum2'<18 & inlist(V`varnum', 1, 4, 7, 8, 18, 16, 26, 43, 45, 50, 53, 57, 60,61, 70,  76, 87, 99, 100, 108, 111 ) ;
label var kid`x'_3 "Person `x' is child of HH (based on age and label)" ;
} ;

egen totkids_3=rowtotal(kid3_3 kid4_3 kid5_3 kid6_3 kid7_3 kid8_3 kid9_3 kid10_3 kid11 kid12_3 kid13_3 kid14_3) ;

***************	HOUSEHOLD SIZE ************* ;
****Just use whehter age is missing as person not exist ;
*****Alternative calculation of # of kids (all HH members under 18) ;
****Loop over all the household age variables: V23, V29, ..., V57 ;
forval x=3/14 { ;
local varnum=23 + (`x'-3)*6 ;
gen per`x'=0 ;
replace per`x'=1 if V`varnum'<. ;
label var per`x' "Person `x' exists (i.e., has non-missing age)" ;
} ;
gen husband_exists=V13<. ;
gen wife_exists=V17<. ;

egen hh_size=rowtotal(husband_exists wife_exists per3 per4 per5 per6 per7 per8 per9 per10 per11 per12 per13 per14 ) ;

****House ownership ;
gen own_home=V120==1 if V120<. ;

***Drop vars we don't need to facilitate the merge ;
keep white tot_fam_inc husband_age self_or_wage sample_type occ1 occ1_harm ind1 city state *wagetot* share_wage hh_size totkids*
own_home wife_exists ;

sum share_wage, d ;
sum tot_fam_inc, d ;

gen urban_samp=1 ;

tempfile urban ;
save `urban' ;

/********************************************************************

*****************	FARMER/RURAL SAMPLE

********************************************************************/ ;


use "./RawData/08908-0004-Data.dta" , clear;

gen occ1=lower(V105)  ;
gen white=V101==1 if V101>0 & V101<. ;
label define white 1 "White" 2 "Non-white" ;
label values white white ;
gen tot_fam_inc=V285 ;
tab white, sum(tot_fam_inc) ;
ren V21 husband_age ;
keep if husband_age>29 & husband_age<51 ;
gen self_or_wage=V107 ;
ren V14 place_type ;
ren V12 state ;
gen occ1_harm=. ;
gen city_name_rur_samp=V10 ;
label values city_name_rur_samp V10 ; 

gen ind1=V106 ;

label var ind1 "Worker 1 industry" ;

****************	Run the occ-coding script	************* ;
run "./code/1b_occ_coding_1936" ;
************************************************************** ;


***As first step, assign *EVERYONE* in the rural/Farm sample as farmers (NOT farm laborers) ;
replace occ1_h=81 if place==3 ;
****Now, code some back as farm laborers if that seems appropriate;


local farm_labor_list `" "farm laborer" "farm hand" "laborer general farm" 
"cotton chopper" "farm hand" "farm laborer" "general farm labor"
"general ranch work" "fruit picker" "picking hops" "cleaning grass seed" "laborer farm work"
"farm work s" "day laborer" "farm labour" "picked cotton" "picking cotton" "s odd jobs"
plowed "farm laborer" "laborer odd jobs" "working at gin s"  "common labor" "laborer"
"farm hand" "odd jobs" "seasonal farm labor" "farm labor" "picking hops" "labor farmer"
"citrus fruit picker" "unskilled labor sorting beans" harvesting "day labor" labourer "harvest labor"
"chick sorter" labor "plowing,butchering,cutting grain" "farm labour for neighbours"
"plowing, general farm work" "farm work" "day laborer" "picked cotton" 
"farm work pulling corn for neighbor" "picking tobacco" "day labourer on farm" thrasher "' ; 

foreach name in `farm_labor_list' { ;
replace occ1_harm=71 if occ1=="`name'" ;
} ;

tab occ1_h if place_type==2 & occ1~="", missing ;

*****Wage earnings ;
*****Individual earnings for each family member listed as V111, V121, etc. ;
*****Type of employment for each member listed as V117, V127, etc. ;
forval x=1/9 { ;
ren V1`x'7 mem`x'_type ;
ren V1`x'1 mem`x'_wagetot ;
/****Note that for this dataset but not for the Urban, missing is coded as 99999.99 for mem1 and
9999.99 or 999.99 for the others */ ;
replace mem`x'_wagetot=. if abs(mem`x'_wagetot-99999.99)<.01 | abs(mem`x'_wagetot-9999.99)<.01 | abs(mem`x'_wagetot-999.99)<.01 ;
gen mem`x'_wagetot0=mem`x'_wagetot ;
replace mem`x'_wagetot0=0 if mem`x'_type~=2 & mem`x'_wagetot~=. ;
} ;
****Note below that rowtotal treats missings as zeros, which is what we want ;
egen fam_wagetot=rowtotal(mem*_wagetot) ;
egen fam_wagetot0=rowtotal(mem*_wagetot0) ;
gen share_wage=fam_wagetot/tot_fam_inc ;
gen share_wage0=fam_wagetot0/tot_fam_inc ;

label var fam_wagetot "Total self-reported wages of all fam members" ;
label var fam_wagetot0 "Totl wages of all fam members, self-employed coded as zero" ;
label var mem1_wagetot "Self-reported wages of first fam member" ;
label var mem1_wagetot0 "Self-reported wages of first fam member, self-emp coded as zero" ;

sum share_wage, d ;
sum tot_fam_inc, d ;


****Identify farmers as those having substantial farm income ;
***From codebook:
/* For farm this [total fam income] should equal the total of V192, V206, V234, and V284 less V247 */ ;
***Note however that V206 is always missing (not just non-numeric, but blank);

ren V192 emp_earn_ex_farm ;
ren V206 fam_vill_oth_inc ;
ren V234 fam_farm_inc_tot ;
ren V284 fam_val_farm_own_use ;
ren V247 fam_exp_tot ;

gen inc_check= emp_earn_ex_farm + fam_farm_inc_tot + fam_val_farm_ - fam_exp_tot ;

gen net_farm_inc=fam_farm_inc_tot + fam_val_farm_ - fam_exp_tot ;

gen net_farm_share=net_farm_inc/tot_fam_inc ;

****************	HOUSEHOLD COMPOSITION	**************** ;
****Tag someone as a kid if that's their label in the household list ;
****Loop over all the household membership variables: V27, V32, ..., V62 ;
forval x=3/10 { ;
local varnum=27 + (`x'-3)*5 ;
gen kid`x'=0 ;
replace kid`x'=1 if inlist(V`varnum', 1, 4, 7, 8, 18, 16, 26, 43, 45, 57, 60, 61, 71,  77, 88, 109, 130, 201, 202, 207, 208, 223, 229, 240, 257 ) ;
label var kid`x' "Person `x' is a child of HH (based on label)" ;
} ;

egen totkids=rowtotal(kid3 kid4 kid5 kid6 kid7 kid8 kid9 kid10 );


*****Alternative calculation of # of kids (all HH members under 18) ;
****Loop over all the household age variables: V29, V34, ..., V64 ;
forval x=3/10 { ;
local varnum=29 + (`x'-3)*5 ;
gen kid`x'_2=0 ;
replace kid`x'_2=1 if V`varnum'<18 ;
label var kid`x'_2 "Person `x' is a child of HH (based on age)" ;
} ;

egen totkids_2=rowtotal(kid3_2 kid4_2 kid5_2 kid6_2 kid7_2 kid8_2 kid9_2 kid10_2);

*****Alternative +2 calculation of # of kids (all HH members under 18 AND label) ;
****Loop over all the household age variables: V29, V34, ..., V64 ;
forval x=3/10 { ;
local varnum=27 + (`x'-3)*5 ;
local varnum2=29 + (`x'-3)*5 ;
gen kid`x'_3=0 ;
replace kid`x'_3=1 if V`varnum2'<18 & inlist(V`varnum', 1, 4, 7, 8, 18, 16, 26, 43, 45, 57, 60, 61, 71,  77, 88, 109, 130, 201, 202, 207, 208, 223, 229, 240, 257 ) ;
label var kid`x'_3 "Person `x' is a child of HH (based on age AND label)" ;
} ;

egen totkids_3=rowtotal(kid3_3 kid4_3 kid5_3 kid6_3 kid7_3 kid8_3 kid9_3 kid10_3);

label var totkids "# kids based on relation label" ;
label var totkids_2 "# kids based on age<18" ;
label var totkids_3 "# kids based on relation label AND age" ;


*************** TOTAL HOUSEHOLD SIZE ****************** ;
forval x=3/10 { ;
local varnum=29 + (`x'-3)*5 ;
gen per`x'=0 ;
replace per`x'=1 if V`varnum'<99.9 /* note different missing code */ ;
label var per`x' "Person `x' exists (i.e., age not missing)" ;
} ;

gen husband_exists=husband_age<. ;
gen wife_exists=V24<. ;

******Home ownership ;
gen own_home=V97==1 if V97<. ;

*****Husband education ;
***Clean up this variable a bit ;
***Note that 0-16 seem to be correct ;
***Codes beyond 42 seem meaningless/inelegible ;
gen finish_6th=V840>5 if V840<43 /* finished 6th grade*/ ;
gen no_hs_att=V840<9 if V840<43 /*never went to HS */ ;
gen some_hs=V840>8 & V840<12 if V840<43 /*some HS*/ ;
gen hs_grad=V840 >11 if V840<43 /*HS grad*/ ;
gen some_coll=(V840>12 & V840<16) | inlist(V840, 26, 30, 31, 32, 33) 
if V840<43;

/**** ;
***NOTE: Coding all these as `some coll' ;

 25 Dental college
          26 12\ Business college
          30 1 B college
          31 .5 years college
          32 2Y Y Normal
          33 Business school
****/ ;
***We will code anyone with more than 4 years as having BA ;

gen coll_ba=(V840>15 & V840<25)	|  inlist(V840, 41, 42) if V840<43;

*****Do you own any acres;
*****Note they don't see to ask home ownership for farmers ;
*****So use this as proxy instead ;

gen acres_owned=V249 if place==3 /*NOT recorded for non-farmers*/ ;
gen own_any_acres=acres_owned>0 if acres_owned<. ;

egen hh_size=rowtotal(husband_exists wife_exists per3 per4 per5 per6 per7 per8 per9 per10) ;

gen own_radio=V1295==1 if V1295<. ;


/***************************************************

*******		Append the urban sample ;

***************************************************/ ;

label list V12 ;
gen source=2 ;
append using `urban' ;
replace source=1 if source==. ;
label var source "Data source" ;
label define source 1 "Urban" 2 "Village/Farm" ;
label values source source ;

***South versus nonsouth (Census region definition) ;
gen south=0 ;
replace south=1 if state==11 /*Delaware*/ | (state>39 & state<61) /*rest of South*/ ;
gen west= state>60 & state<83 ;
gen midwest=state>14 & state<39 ;
gen northeast=state<11 | (state>11 & state<15) ;

***Gen deep south ;
gen deep_south=inlist(state, 41, 44, 45, 46, 48 ) ;
gen region=. ;
replace region=1 if northeast==1 ;
replace region=2 if midwest==1 ;
replace region=3 if south==1 ;
replace region=4 if west==1 ;

save "./RawData/pooled_data_w_occ_codes", replace ;
